import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
III Categorical data
%%javascript
= function(lines) {
IPython.OutputArea.prototype._should_scroll return false; // disable auto scrolling
}
# III Visualization of distributional data (“displot”)
= sns.load_dataset("penguins")
penguins penguins.head()
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | |
---|---|---|---|---|---|---|---|
0 | Adelie | Torgersen | 39.1 | 18.7 | 181.0 | 3750.0 | Male |
1 | Adelie | Torgersen | 39.5 | 17.4 | 186.0 | 3800.0 | Female |
2 | Adelie | Torgersen | 40.3 | 18.0 | 195.0 | 3250.0 | Female |
3 | Adelie | Torgersen | NaN | NaN | NaN | NaN | NaN |
4 | Adelie | Torgersen | 36.7 | 19.3 | 193.0 | 3450.0 | Female |
= sns.load_dataset("tips")
tips tips.head()
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
Categorical scatterplots
=tips,
sns.catplot(data="day",
x="tip",
y# kind='strip' # default is 'strip'
=False # default is True
jitter )
=tips,
sns.catplot(data="day",
x="tip",
y="swarm") kind
/home/pierro/mambaforge/lib/python3.10/site-packages/seaborn/categorical.py:3544: UserWarning:
8.1% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
=tips,
sns.catplot(data="day",
x="tip",
y="time",
hue="swarm") kind
/home/pierro/mambaforge/lib/python3.10/site-packages/seaborn/categorical.py:3544: UserWarning:
8.1% of the points cannot be placed; you may want to decrease the size of the markers or use stripplot.
=tips,
sns.catplot(data="day",
x="total_bill",
y="size",
hue='sex') col
# Specify the ordering if the categorical values
=tips,
sns.catplot(data="day",
x="tip",
y=["Thur", "Fri","Sat","Sun"]) order
# Swapping orientation
=tips,
sns.catplot(data="total_bill",
x="day",
y="time",
hue='sex') col
Comparing distributions along categoritcal variables (mean, median, quantiles)
Boxplot using kind='box'
=tips,
sns.catplot(data="day",
x="total_bill",
y="box") kind
=tips,
sns.catplot(data="day",
x="total_bill",
y="smoker",
hue="box") kind
Violin plots with kind='violin'
sns.catplot(=tips,
data="day",
x="total_bill",
y="sex"
hue )
sns.catplot(=tips,
data="day",
x="total_bill",
y="sex",
hue='violin'
kind )
# have the two populations’ violin “stick” to each other with split=True'
sns.catplot(=tips,
data="day",
x="total_bill",
y="sex",
hue='time',
col="violin",
kind=True,
split )
sns.catplot(=tips,
data="day",
x="total_bill",
y="sex",
hue="violin",
kind='stick',
inner=True,
split )
# Showing additional dimensions with col=(...)
or row=(...)
sns.catplot(=tips,
data="day",
x="total_bill",
y="sex",
col="violin",
kind='stick',
inner=True,
split )
Bar plots
=tips,
sns.catplot(data="day",
x="total_bill",
y="sex",
hue="bar") kind
=tips,
sns.catplot(data="day",
x="total_bill",
y="sex",
hue="bar",
kind="ci") # standard confidence interval errorbar
=tips,
sns.catplot(data="day",
x="total_bill",
y="sex",
hue="bar",
kind='sd') # standard deviation 'sd' errorbar
Counting data with kind='count'
sns.catplot(=tips,
data="day",
x="sex",
hue="count"
kind )
Point plots to emphasize continuity
=tips,
sns.catplot(data="day",
x="tip",
y="sex",
col="point",
kind='<') markers